I did this a bit flippantly before, but I want to fomalize the process by which we estimate the uncertainty on emulator predictions.
In [63]:
from pearce.emulator import SpicyBuffalo, LemonPepperWet
from pearce.mocks import cat_dict
import numpy as np
from os import path
In [64]:
import matplotlib
#matplotlib.use('Agg')
from matplotlib import pyplot as plt
%matplotlib inline
import seaborn as sns
sns.set()
In [65]:
#xi gg
training_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_lowmsat/PearceRedMagicXiCosmoFixedNd.hdf5'
#test_file= '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/'
test_file = '/scratch/users/swmclau2/xi_zheng07_cosmo_test_lowmsat2/PearceRedMagicXiCosmoFixedNd_Test.hdf5'
In [66]:
em_method = 'gp'
split_method = 'random'
In [67]:
a = 1.0
z = 1.0/a - 1.0
In [68]:
fixed_params = {'z':z}#, 'cosmo': 0}#, 'r':24.06822623}
In [69]:
from glob import glob
In [70]:
hp = []
for fname in sorted(glob('/home/users/swmclau2/Git/pearce/bin/optimization/sloppy_joes_indiv_bins/sloppy*.npy')):
#print fname, len(hp)
hp.append(np.loadtxt(fname))
In [71]:
len(hp)
Out[71]:
In [72]:
param_names = ['ombh2', 'omch2', 'w0', 'ns', 'ln10As', 'H0', 'Neff', 'logM0', 'sigma_logM', 'logM1', 'alpha']
In [73]:
pnames = ['bias', 'amp']
pnames.extend(param_names)
pnames.append('amp')
pnames.extend(param_names)
In [74]:
from collections import defaultdict
metric = []
for _hp in hp:
metric.append(defaultdict(list))
for val, pname in zip(_hp, pnames):
metric[-1][pname].append(val)
In [75]:
np.random.seed(0)
emu = LemonPepperWet(training_file, method = em_method, fixed_params=fixed_params,
custom_mean_function = None, downsample_factor = 0.1)#, hyperparams = {'metric':metric})
In [76]:
emu.get_param_names()
Out[76]:
In [77]:
pred_y, data_y = emu.goodness_of_fit(test_file, statistic = None)
In [78]:
print len(pred_y[0])
In [79]:
test_x, _, test_err, _ = emu.get_data(test_file, emu.fixed_params)
t, old_idxs = emu._whiten(test_x)
In [80]:
train_x, train_y, train_err, info = emu.get_data(training_file, emu.fixed_params)
In [ ]:
In [81]:
len(train_err)
Out[81]:
In [82]:
mean_func_at_params = emu.mean_function(t)
In [83]:
mean_func_at_params.shape
Out[83]:
In [84]:
plt_idx = 4249
print t[0][plt_idx]
print pred_y[0][plt_idx]
print data_y[0][plt_idx]
#print mean_func_at_params[0][plt_idx]
In [85]:
resmat = np.zeros(( 7, 5, 1000, 18))
resmat_log = np.zeros_like(resmat)
predmat = np.zeros(( 7, 5, 1000, 18))
datamat = np.zeros(( 7, 5, 1000, 18))
for bin_no in xrange(len(pred_y)):
py, dy = pred_y[bin_no], data_y[bin_no]
for cosmo_no in xrange(7):
cpy, cdy = py[cosmo_no*5000:(cosmo_no+1)*5000], dy[cosmo_no*5000:(cosmo_no+1)*5000]
#for realization_no in xrange(5):
for hod_no in xrange(1000):
crpy, crdy = cpy[hod_no::1000], cdy[hod_no::1000]
#for hod_no in xrange(1000):
#print crpy.std()
predmat[cosmo_no, :, hod_no , bin_no] = crpy
datamat[cosmo_no, :, hod_no , bin_no] = crdy
resmat[cosmo_no, :, hod_no , bin_no] = 10**crpy - 10**crdy
resmat_log[cosmo_no, :, hod_no , bin_no] = crpy - crdy
In [86]:
resmat.shape
Out[86]:
In [87]:
N = 5
for cosmo_no in xrange(7):
for hod_no in xrange(1000):
r = resmat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C').T
print r.shape
scovmat = r.dot(r.T)/r.shape[1]
#fig = plt.figure(figsize = (10, 6))
#plt.subplot(121)
im = plt.imshow(np.log10(np.abs(scovmat) ))
plt.colorbar(im)
#plt.subplot(122)
#pred = 10**predmat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C').T
#print pred.shape
#data = datamat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C').T
#scovmat2 = np.cov(pred)
#im = plt.imshow(np.log10(np.abs(scovmat2) ), cmap = 'viridis', vmin = -5,vmax = 9)
#plt.colorbar(im)
plt.show()
N-=1
if N == 0:
break
if N==0:
break
In [88]:
N = 50
for cosmo_no in xrange(7):
for hod_no in xrange(1000):
pred = 10**predmat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C')
data = 10**datamat[cosmo_no, :, hod_no].reshape((-1, 18), order = 'C')
for p,d in zip(pred,data):
plt.plot(emu.scale_bin_centers, np.abs(p-d)/d)
#for d in data:
# plt.plot(emu.scale_bin_centers, d)
plt.xscale('log')
plt.yscale('log')
plt.show()
N-=1
if N == 0:
break
if N==0:
break
In [89]:
resmat_flat = resmat.reshape((-1, 18)).T
datamat_flat = datamat.reshape((-1, 18)).T
#resmat_hodrealav = np.mean(resmat_realav, axis = 1)
In [90]:
r_idx = 10
t_bin = t[r_idx]
acc_bin = np.abs(resmat_flat[r_idx])/datamat_flat[r_idx]
In [91]:
t_bin.shape, acc_bin.shape
Out[91]:
In [92]:
percentiles = np.percentile(acc_bin, range(101))
norm_acc_bin = np.digitize(acc_bin, percentiles)
#norm_acc_bin = 100*((acc_bin - acc_bin.min())/acc_bin.max()).astype(int)
In [93]:
palette = sns.diverging_palette(220, 20, n=len(percentiles)-1, as_cmap=True)
#sns.set_palette(palette)
In [94]:
pnames = emu.get_param_names()
In [95]:
for axes1 in xrange(7,11):
for axes2 in xrange(axes1+1, 11):
cbar = plt.scatter(t_bin[:,axes1 ], t_bin[:,axes2], c = norm_acc_bin,cmap = palette, alpha = 0.2)
plt.colorbar(cbar)
plt.xlabel(pnames[axes1])
plt.ylabel(pnames[axes2])
#plt.gray()
plt.show()
In [96]:
test_err_bin = test_err[:, r_idx, r_idx]
In [97]:
plt.hist(np.log10(test_err_bin) )
Out[97]:
In [98]:
plt.hist(np.log10(np.sqrt(emu.yerr[r_idx])) )
Out[98]:
In [99]:
len(emu.yerr)
Out[99]:
In [100]:
percentiles = np.percentile(np.log10(test_err_bin), [0, 10,20,30,40,50,60,70,80,90,100])
norm_err_bin = np.digitize(np.log10(test_err_bin), percentiles)
In [101]:
#relevant stat is uncertainty in training error, not test error
for axes1 in xrange(7,11):
for axes2 in xrange(axes1+1, 11):
cbar = plt.scatter(t_bin[:,axes1 ], t_bin[:,axes2], c = norm_err_bin,cmap = palette, alpha = 0.2)
plt.colorbar(cbar)
plt.xlabel(pnames[axes1])
plt.ylabel(pnames[axes2])
#plt.gray()
plt.show()
In [102]:
test_err_diag= np.array([test_err[:, r_idx, r_idx] for r_idx in xrange(emu.n_bins)] )
In [103]:
test_err_diag.mean(axis = 1)
Out[103]:
In [112]:
np.sqrt(np.mean(np.square(np.abs(resmat_flat)/(10**datamat_flat) )))
Out[112]:
In [104]:
plt.plot(emu.scale_bin_centers, np.mean(np.abs(resmat_flat)/(10**datamat_flat), axis = 1) )
plt.plot(emu.scale_bin_centers, np.mean(np.abs(test_err_diag), axis = 1) )
plt.xscale('log')
In [105]:
resmat_log_flat = resmat_log.reshape((-1, 18)).T
In [106]:
np.mean(np.array([ye/np.abs(y+1e-9) for ye, y in zip(emu.yerr, emu.y)]) , axis = 1)
Out[106]:
In [107]:
plt.plot(emu.scale_bin_centers, np.mean(np.abs(resmat_log_flat)/np.abs(datamat_flat), axis = 1), label = 'Pred acc' )
#plt.plot(emu.scale_bin_centers, np.mean(np.array([ye/np.abs(y+1e-9) for ye, y in zip(emu.yerr, emu.y)]), axis = 1), label = 'Training Err' )
plt.yscale('log')
plt.legend(loc ='best')
plt.xscale('log')
In [108]:
for res, dat in zip(resmat_flat, 10**datamat_flat):
plt.hist(res/dat, bins = 30)#, bins = np.linspace(-1, 1, 30))
plt.yscale('log')
plt.show()
In [109]:
scovmat = resmat_flat.dot(resmat_flat.T)/(len(pred_y[0])-1)
In [110]:
im = plt.imshow(np.log10(scovmat) )
plt.colorbar(im)
plt.show()
In [111]:
print np.sqrt(np.diag(scovmat))
In [ ]:
In [ ]:
In [ ]: